import sys
import string
from tqdm import tqdm
table = str.maketrans('', '', string.punctuation)

inpfile = sys.argv[1]
outfile = "processed_graph_omcs.csv"

def hasNumbers(inputString):
    return any(char.isdigit() for char in inputString)

def process_row(row):
    tup = row.strip().split("|")
    vertex = tup[0]
    edge = tup[1]
    striped = strip_punc(vertex)
    if hasNumbers(striped.lower()):  # has number in the vertex, we dont care
        return None
    if len(striped)<=3: # has length less than 4 we dont care
        return None
    row = f"{striped.lower()}|{edge}"
    return row

def strip_punc(v):
    return v.translate(table)


with open(inpfile,"r") as ifd, open(outfile,"w") as ofd:
    for ix,row in tqdm(enumerate(ifd)):
        prow = process_row(row)
        if prow is not None:
            ofd.write(f"{prow}\n")